# Snakemake file for ChIP-seq analysis
# Author: Elise van Bree
# Date: 24-04-2018

# work from a directory that contains the file hg19.chrom.sizes and the script bedGraphToBigWig.
# two directories above the working directory there should be a directory called bowtie, containing the indexed hg19 genome, or change path in script to this directory (../../bowtie/).

configfile: "config_github.yaml"

rule all:
	input:
		# expand("03_bowtie/{sample}.sam", sample=config["samples"])
		# expand("00_raw/{sample}_R1.fastq.gz", sample=config["samples"]),
		# expand("00_raw/{sample}_R2.fastq.gz", sample=config["samples"])
		# expand("04_bam/{sample}.bam", sample=config["samples"])
		# expand("04_bam/{sample}_sorted.bam", sample=config["samples"])
		# expand("04_bam/{sample}_sorted_rmdup.bam", sample=config["samples"])
		# expand("05_bedgraph/{sample}_sorted_rmdup.bedgraph", sample=config["samples"])
		  expand("06_bigwig/{sample}.bw", sample=config["samples"])
	message: "ChIP-seq pipeline succesfully run."



rule trimmomaticPaired:
	input:
		forward = lambda wildcards: "00_raw/" + config["samples"][wildcards.sample]["forward"] + ".fastq.gz",
		reverse = lambda wildcards: "00_raw/" + config["samples"][wildcards.sample]["reverse"] + ".fastq.gz"
	output:
		forward = "02_trimmed/{sample}_forward_trimmed_2.fastq",
		reverse = "02_trimmed/{sample}_reverse_trimmed_2.fastq",
		forwardUnpaired = "02_trimmed/{sample}_forward_Unpaired_trimmed_2.fastq",
		reverseUnpaired = "02_trimmed/{sample}_reverse_Unpaired_trimmed_2.fastq"
	message: "Trimming {input.forward} and {input.reverse} using Trimmomatic. Raw data will be gzipped after trimming is done. This will slow down the process, but save space on the server."
	shell:
		"java -jar /zfs/datastore0/software/src/Trimmomatic-0.36/trimmomatic-0.36.jar PE -threads {threads} -phred33 {input.forward} {input.reverse} {output.forward} {output.forwardUnpaired} {output.reverse} {output.reverseUnpaired} ILLUMINACLIP:/zfs/datastore0/software/src/Trimmomatic-0.36/adapters/TruSeq3-PE.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:40 ; gzip {input.forward} ; gzip {input.reverse}"



rule bowtie2:
	input:
		forward = "02_trimmed/{sample}_forward_trimmed_2.fastq",
		reverse = "02_trimmed/{sample}_reverse_trimmed_2.fastq",
		forwardUnpaired = "02_trimmed/{sample}_forward_Unpaired_trimmed_2.fastq",
		reverseUnpaired = "02_trimmed/{sample}_reverse_Unpaired_trimmed_2.fastq"
	output: "03_bowtie/{sample}.sam"
	message: "Mapping files {input.forward} {input.reverse} {input.forwardUnpaired} and {input.reverseUnpaired} using bowtie2. Trimmed files will be gzipped after mapping is done. This will slow down the process, but save space of the server."
	shell:
		"bowtie2 --end-to-end --very-sensitive -X 600 -I 50 -p {threads} -q --mm -x ../../bowtie/hg19 -1 {input.forward} -2 {input.reverse} -U {input.forwardUnpaired},{input.reverseUnpaired} -S {output} ; gzip {input.forward} ; gzip {input.reverse} ; gzip {input.forwardUnpaired} ; gzip {input.reverseUnpaired}"
	# 	check the use of -X and -I for munimum and maximum fragment length for valid paired-end allignments.
	#	-X 500 maximum fragment length for valid paired-end alignments
	#	-I 80 The minimum fragment length for valid paired-end alignments

rule bam:
	input:
		"03_bowtie/{sample}.sam"
	output:
		"04_bam/{sample}.bam"
	message: "Converting {input} to BAM. This will result in a smaller file. SAM file will be removed after BAM is generated."
	shell:
		"samtools view -b -@ {threads} -o {output} {input}"



rule sort:
	input:
		"04_bam/{sample}.bam"
	output:
		"04_bam/{sample}_sorted.bam"
	message: "Sorting {input} using samtools. This will result in a smaller file. Unsorted file will be removed after sorted file is generated, to save space on the server."
	shell:
		"samtools sort -@ {threads} -o {output} {input}"



rule rmdup:
	input: 
		"04_bam/{sample}_sorted.bam"
	output:
		"04_bam/{sample}_sorted_rmdup.bam"
	message: "Removing duplicates in {input} using samtools. {input} will be removed after rmdup is done."
	shell:
		"samtools rmdup {input} {output}"



rule bedgraph:
	input: 
		"04_bam/{sample}_sorted_rmdup.bam"
	output:
		"05_bedgraph/{sample}_sorted_rmdup.bedgraph"
	message: "Making bedgraph of {input} using bedtools. Bam file will be saved, since it is necessary for peak calling."
	shell:
		"bedtools genomecov -bga -ibam {input} -g hg19.genome > {output}"



rule bigwig:
	input: 
		"05_bedgraph/{sample}_sorted_rmdup.bedgraph"
	output:
		"06_bigwig/{sample}.bw"
	message: "Converting {input} to bigwig using bedGraphToBigWig from UCSC."
	shell:
		"bedGraphToBigWig {input} hg19.chrom.sizes {output}"
